# -*- coding: utf-8 -*-
import urllib2
import json
from bs4 import BeautifulSoup

url = "http://bj.lianjia.com/ershoufang/dongcheng/"

totalPage = 0
currPage = 1
def parsePageNum(url):
    content = urllib2.urlopen(url).read()
    soup = BeautifulSoup(content, "html.parser")

    pageBoxDiv = soup.find_all("div",_class="page-box house-lst-page-box",limit=1)
    pageDataStr = pageBoxDiv.get("page-data")
    pageDataJson = json.loads(pageDataStr)
    totalPage = pageDataJson["totalPage"]
    currPage = pageDataJson["currPage"]

def parseHousePage(url):
    for i in range(totalPage):
        parseHouseInfo(url+"pg"+i)


def parseHouseInfo(url):

    content = urllib2.urlopen(url).read()
    #req = urllib2.Request("file:/Users/quanyuelong/2017_workspace/crawler-core/src/dongcheng.html")
    #r = urllib2.urlopen(req)
    #content = r.read()
    soup = BeautifulSoup(content, "html.parser")
    ul = soup.find("ul", attrs={"class":"sellListContent"})

    lis = ul.find_all("li")

    for house in lis:
        houseId = house.find("div",attrs={"class":"title"}).find("a").get("href").replace("http://bj.lianjia.com/ershoufang/","").replace(".html","")
        houseTitle = house.find("div",attrs={"class":"title"}).find("a").text

        housePrice = house.find("div",attrs={"class":"totalPrice"}).find("span").string
        houseXiaoQu = house.find("div",attrs={"class":"houseInfo"}).find("a").get("href").replace("http://bj.lianjia.com/xiaoqu/","").replace("/","")
        houseInfo = house.find("div",attrs={"class":"houseInfo"}).text
        flood = house.find("div",attrs={"class":"flood"}).text
        followInfo = house.find("div",attrs={"class":"followInfo"}).text
        print houseId, "#####",houseXiaoQu, "#####",houseTitle, "#####",housePrice, "#####", houseInfo, "#####", followInfo


def main():
    parsePageNum(url)
    parseHousePage(url)
    #parseHouseInfo(url)
if __name__ == "__main__":
    main()
